I'm gonna see how scikit dimensionality reduction codes work on our data.



In [117]:

    
from pearce.emulator import OriginalRecipe, ExtraCrispy, SpicyBuffalo
from pearce.mocks import cat_dict
import numpy as np
from os import path



In [118]:

    
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()



In [119]:

    
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
test_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_test//PearceRedMagicXiCosmoFixedNd_Test.hdf5'
em_method = 'gp'
split_method = 'random'



In [120]:

    
a = 1.0
z = 1.0/a - 1.0



In [121]:

    
fixed_params = {'z':z, 'r':24.06822623}#, 'cosmo': 0}#,



In [122]:

    
np.random.seed(0)
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,\
                     custom_mean_function = 'linear', downsample_factor=0.02)#,
                    #hyperparams = {'n_estimators': 500,
                    #              'max_depth': 5})



In [123]:

    
fixed_params = {'z':z, 'r':24.06822623}#, 'cosmo': 0}#,



In [124]:

    
x, y, _, _ = emu.get_data(training_file,fixed_params)



In [125]:

    
print x.shape









    



(40000, 11)



In [126]:

    
from sklearn.manifold import Isomap, LocallyLinearEmbedding, SpectralEmbedding
from sklearn.decomposition import PCA
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import scale

x = scale(x)



In [127]:

    
x_train, _, y_train, _ = train_test_split(x,y,  train_size = 0.25 )



In [146]:

    
embedding = Isomap(n_components=4, n_neighbors=100)
#embedding = PCA(n_components=x.shape[1])
#embedding = LocallyLinearEmbedding(n_components=4, n_neighbors = 20)
#embedding = SpectralEmbedding(n_components = 4, n_neighbors = 10)



In [ ]:

    
x_embeded = embedding.fit_transform(x_train)



In [ ]:

    
x_embeded.shape



In [ ]:

    
y_color = (y_train - y_train.min())/(y_train-y_train.min()).max()



In [ ]:

    
pal = sns.cubehelix_palette(12)
sns.palplot(pal)
sns.set_palette(pal)



In [ ]:

    
plt.scatter(x_embeded[:, 0], x_embeded[:, 1], c = y_color, alpha = 0.3)



In [ ]:

    
plt.scatter(x_embeded[:, 1], x_embeded[:, 2], c = y_color, alpha = 0.3)



In [ ]:

    
plt.scatter(x_embeded[:, 0], x_embeded[:, 2], c = y_color, alpha = 0.3)



In [ ]:

    
for i in xrange(x_embeded.shape[1]):
    plt.scatter(x_embeded[:,i], y_train, c = y_color)
    plt.show()



In [ ]:



In [ ]: